# Table_A01-A06.R

# Part of the replication archive for 
#
#   Bullock, John G. 2020. "Education and Attitudes toward Redistribution in
#   the United States." British Journal of Political Science 50.


# This file produces Tables A1 through A6 in the appendix to the article. 
# The tables report basic descriptive statistics. There is one table for each
# of the main outcomes studied in the paper. The set of cases used for each 
# table is the set of cases that I use to estimate the baseline model.  

library(Bullock, lib.loc = c(.libPaths(), 'packageLibrary'))  # for qw()
source('IV_setup.R')
varNames          <- qw('eqwlth goveqinc guarantee.7pt govt.health.7pt helppoor welfare')
dfNames           <- qw('GSS.df GSS.df   ANES.df       ANES.df         GSS.df   GSS.df') 
ANES.df$male      <- !ANES.df$female 
ANES.df$white     <- ANES.df$race == 'white'
ANES.df$black     <- ANES.df$race == 'black'
ANES.df$otherRace <- ANES.df$race == 'otherRace'
GSS.df$male       <- !GSS.df$female
GSS.df$white      <- GSS.df$race == 'white'
GSS.df$black      <- GSS.df$race == 'black'
GSS.df$otherRace  <- GSS.df$race == 'otherRace'
varList           <- qw("eqwlth educ CA.fac male female white black otherRace bornInUS age yearYoung yearInt")
varNamesFull <- c(
  "Redistribution to poor (1) (GSS)",
  "Redistribution to poor (2) (GSS)",
  "Guaranteed standard of living (ANES)",
  "Health care (ANES)",
  "Help the poor (GSS)",
  "Welfare (GSS)")
tableRownames <- c(
  "depvar",
  "Years of education",
  "Compulsory attendance $\\in \\{8, 9, 10\\}$",
  "Compulsory attendance $\\geq$ 11",
  "Male",
  "Female",
  "White",
  "Black",
  "Other race",
  "Born in the United States",
  "Age",
  "Year turned 14",
  "Year of interview")
tableList         <- list() 
tableListLaTeX    <- list() 
for (i in 1:length(varNames)) {

  # Create data frames
  dfName  <- dfNames[i] 
  varName <- varNames[i]  
  df.tmp  <- get(dfName)
  varList[1] <- varName
  df.tmp <- df.tmp[!is.na(df.tmp$state.contemp) & !is.na(df.tmp$stateYoung), varList]
  df.tmp <- df.tmp[complete.cases(df.tmp), ]
  
  # In df.tmp, turn CA.fac into dummy variables
  CA.fac.ind <- which(colnames(df.tmp) == 'CA.fac')
  CA.fac1    <- df.tmp$CA.fac == '(7,10]'
  CA.fac2    <- df.tmp$CA.fac == '(10,100]'
  df.tmp     <- data.frame(
    df.tmp[, 1:(CA.fac.ind - 1)],
    CA.fac1,
    CA.fac2,
    df.tmp[, (CA.fac.ind+1):ncol(df.tmp)])
  
  # Create table
  summaryStats           <- matrix(NA, ncol(df.tmp), 5)
  rownames(summaryStats) <- colnames(df.tmp)  
  colnames(summaryStats) <- qw('N Mean SD Min Max')
  summaryStats[, 'N']    <- apply(df.tmp, 2, length)
  summaryStats[, 'Mean'] <- apply(df.tmp, 2, mean)
  summaryStats[, 'SD']   <- apply(df.tmp, 2, sd)
  summaryStats[, 'Min']  <- apply(df.tmp, 2, min)
  summaryStats[, 'Max']  <- apply(df.tmp, 2, max)
  
  # Format table. The latexTable() function doesn't handle the table in quite
  # the way that I want, so I need to do some advance work.  
  summaryStats <- round(summaryStats, 2)  
  summaryStats[, 'Mean'] <- as.character(summaryStats[, 'Mean'])
  summaryStats[, 'Mean'] <- gsub('^0\\.', '.', summaryStats[, 'Mean'])
  summaryStats[, 'Mean'] <- gsub('(\\.\\d)$', '\\10', summaryStats[, 'Mean'])
  summaryStats[, 'SD']   <- gsub('^0\\.', '.', summaryStats[, 'SD'])
  summaryStats[, 'SD']   <- gsub('(\\.\\d)$', '\\10', summaryStats[, 'SD'])
  
  # Add table to lists
  tableList[[varName]]      <- summaryStats
  tableRownames[1]          <- varNamesFull[i]  
  commandNameVarName  <- gsub('.7pt', '', varName)
  tableListLaTeX[[varName]] <- latexTable(
    mat                = summaryStats,
    SETable            = FALSE,
    formatNumbers      = FALSE,    
    hspace             = '-.2in',
    rowNames           = tableRownames,
    colNames           = colnames(summaryStats),
    colNameExpand      = FALSE,
    spacerColumns      = 0:4, 
    spacerColumnsWidth = qw('.5em .5em 1em .5em .5em'),
    headerFooter       = TRUE,
    footerRows         = NULL,
    caption            = paste0(
      "\\textit{Summary statistics for baseline-model analysis of ``",
      gsub(' \\(\\w\\w\\w\\w?\\)', '', tolower(varNamesFull[i])),  # eliminate " (GSS.df)" from end of name
      "'' (",                                                      # close quotation marks
      gsub('.df', '', dfName),                                     # add, e.g., "GSS"
      ").}  "), 
    captionMargins     = if (i==3) qw("-.25in -.5in") else NULL,
    commandName        = paste0('TabSummaryStats', gsub('\\.', '', commandNameVarName)),
    callCommand        = FALSE)  
}


# Make some spacing adjustments to the columns.  
tableListLaTeX <- lapply(
  X   = tableListLaTeX,
  FUN = function (x) {
    rowToChange        <- grep('{{\\hspace*{0em}}}', x, fixed = TRUE)[1]
    x[rowToChange]     <- sub('{0em}',  '{1.05em}',  x[rowToChange],     fixed = TRUE)
    x[rowToChange + 2] <- sub('{4}{2}', '{4}{3}',    x[rowToChange + 2], fixed = TRUE)
    x[rowToChange + 6] <- sub('{0em}',  '{.75em}',   x[rowToChange + 6], fixed = TRUE)  
    x[rowToChange + 8] <- sub('{0em}',  '{.75em}',   x[rowToChange + 8], fixed = TRUE)
    x
  })



# MAKE LATEX FILE
latexTablePDF(
  latexCommands      = tableListLaTeX, 
  firstPageEmpty     = FALSE,
  wrapper            = FALSE,
  outputFilenameStem = 'float_output/Table_A01-A06',     
  overwriteExisting  = TRUE,
  keepPDFFile        = FALSE,
  keepTexFile        = TRUE,
  openPDFOnExit      = FALSE)
